* Do file to import and clean census data (by postcode)

* March 2015 (code for 2011). Updated Dec 2018 to include 2006 and 2016 census data. 

clear

set more off


*1. import census csv files, change names and clean

	global files Age Bedrooms Education Employment Income Mortgage Rent Schooling Tenure
	*
	forval year = 2006(10)2016 {
		foreach xx of global files {
		clear
import excel using OrigData/Census`year'postcode/`xx'.xlsx
		drop A
		drop if _n<9
		drop if _n==2 
		drop if strpos(B,"crosses")>0 & strpos(B,"VIC") ==0
		drop if strpos(B,"Cells")>0
		
export excel using OrigData/Census`year'postcode/`xx'_postcode_clean.xls, replace
	
		clear
	
import excel using OrigData/Census`year'postcode/`xx'_postcode_clean.xls, firstrow
		missings dropvars, force

		label variable A "postcode" 
		
		foreach var of varlist _all   {
			local x : variable label `var'
			local new_name = strtoname(abbrev("`x'",32))
			rename `var' `new_name'
			}
		
	drop if postcode=="Total"
	replace postcode = substr(postcode,1,4)
	destring, replace
	drop if postcode ==.
	label data `xx'
save OrigData/Census`year'postcode/`xx'_postcode.dta, replace
	
	clear
	}
	}


	** some files downloaded cross border postcodes separately  - 2016

	foreach xx in Bedrooms Mortgage Rent Tenure {
	
	clear	
	import excel using OrigData/Census2006postcode/`xx'_Crossing.xlsx
	
		drop A
		drop if _n<9
		drop if _n==2 
		drop if strpos(B,"crosses")>0 & strpos(B,"VIC") ==0
		drop if strpos(B,"Cells")>0
		
export excel using OrigData/Census2006postcode/`xx'_Crossing_postcode_clean.xls, replace
	
		clear
	
import excel using OrigData/Census2006postcode/`xx'_Crossing_postcode_clean.xls, firstrow
		missings dropvars, force

		label variable A "postcode" 
		
		foreach var of varlist _all   {
			local x : variable label `var'
			local new_name = strtoname(abbrev("`x'",32))
			rename `var' `new_name'
			}
		
	drop if postcode=="Total"
	replace postcode = substr(postcode,1,4)
	destring, replace

	
	append using OrigData/Census2006postcode/`xx'_postcode.dta
	drop if postcode ==.
	save OrigData/Census2006postcode/`xx'_postcode.dta, replace
		
}
	
	
	
	
	
	
	
	
	
	

	
	
	
	
*** 1. FILES THAT ARE CONSISTENT ACROSS YEARS
	
	
	forval year = 2006(10)2016 {
use OrigData/Census`year'postcode/Age_postcode.dta, clear
	capture drop Total
	egen Total =rowtotal(_*)


	forval x = 0(1)115 {
		capture rename _`x' year`x'
		}
	
	
	* find the bucket of the median 
	gen N_median = Total/2
	gen N_75perc = 0.75*Total
	
	gen median_age = 0
	gen perc75_age = 0
	egen row_sum1 = rowtotal(year0 year1)
	
	
	forval x = 1(1)99{
		local y = `x' + 1 
		local z = `x' + 2
		egen row_sum`y' = rowtotal(row_sum`x' year`y')
		replace median_age = `y' if N_median > row_sum`x' & N_median < row_sum`y'
		replace perc75_age = `y' if N_75perc > row_sum`x' & N_75perc < row_sum`y'
		}
 	
	keep postcode median_ perc75_

save OrigData/Census`year'postcode/AverageAge_postcode.dta, replace

 *  proportion working full time
 
use OrigData/Census`year'postcode/Employment_postcode.dta, clear

	gen PFullTime = Employed__worked_full_time/(Total-Not_stated )
 
	keep postcode PFullTime
 
save OrigData/Census`year'postcode/PFullTime_postcode.dta, replace
 
 

 * tenure	
use  OrigData/Census`year'postcode/Tenure_postcode.dta, clear
	capture drop Total
	capture rename Fully_owned Owned_outright
	capture rename Being_purchased Owned_with_a_mortgage
	egen Total = rowtotal( Owned_outright Owned_with_a_mortgage Being_purchased_* Rented Being_occupied_rent_free Being_occupied_under_a_life_tenu Other_tenure_type)

	gen POwnedOutright= Owned_outright/Total
	gen PRented = Rented/Total
	  
	keep POwnedOutright PRented postcode
  
save OrigData/Census`year'postcode/POwnership_postcode.dta, replace
 
 
 }
 
 


*2. PERSONAL INCOME
use OrigData/Census2006postcode/Income_postcode.dta, clear

* censor negative income to 0.  

	rename Negative_income Inc_1
	rename Nil_income Inc_2
	rename  _1__149 Inc_3
	rename _150__249 Inc_4
	rename _250__399 Inc_5
	rename _400__599 Inc_6
	rename _600__799 Inc_7
	rename _800__999 Inc_8
	rename _1_000__1_299 Inc_9
	rename _1_300__1_599 Inc_10
	rename _1_600__1_999 Inc_11
	rename _2_000_or_more Inc_12

	gen val_inc1 = Inc_1 *(0)
	gen val_inc2 = Inc_2 *0 
	gen val_inc3 = Inc_3 *75
	gen val_inc4 = Inc_4 *200
	gen val_inc5 = Inc_5 *325
	gen val_inc6 = Inc_6 *500
	gen val_inc7 = Inc_7 *700
	gen val_inc8 = Inc_8 *900
	gen val_inc9 = Inc_9 *1150
	gen val_inc10 = Inc_10 *1450
	gen val_inc11 = Inc_11 *1800
	gen val_inc12 = Inc_12 *2500

	egen N_inc = rowtotal(Inc_*)

	egen Total_val_inc = rowtotal(val_*)

	* find the bucket of the median 
	gen N_median = N_inc/2
	gen N_75perc = 0.75*N_inc
	
	gen median_income = 0
	gen perc75_income = 0
	egen row_sum1 = rowtotal(Inc_1 Inc_2)
	
	
	forval x = 1(1)10{
		local y = `x' + 1 
		local z = `x' + 2
		egen row_sum`y' = rowtotal(row_sum`x' Inc_`z')
		replace median_income = (val_inc`z')/Inc_`z' if N_median > row_sum`x' & N_median <= row_sum`y'
		replace perc75_income = (val_inc`z')/Inc_`z' if N_75perc > row_sum`x' & N_75perc <= row_sum`y'
		}
		
	
	label variable median_income "Median weekly income (postcode)"
	label variable perc75_income "75th percentile weekly income (postcode)"
	
	keep postcode median_income perc75_income

save OrigData/Census2006postcode/AverageIncome_postcode.dta, replace


use OrigData/Census2016postcode/Income_postcode.dta, clear

	rename Negative_income Income_neg
	rename Nil_income Income_0
	rename _1__149 Income_1
	rename _150__ Income_2
	rename _300__ Income_3
	rename _400__ Income_4
	rename _500__ Income_5
	rename _650__ Income_6
	rename _800__ Income_7
	rename _1_000_ Income_8
	rename _1_250 Income_9
	rename _1_500 Income_10
	rename _1_750 Income_11
	rename _2_000 Income_12
	rename _3_000 Income_13
	

	capture drop Total

	* not creating variable for negative income or income >5000
	gen val_inc0 = 0
	gen val_inc1 = 75
	gen val_inc2 = 225
	gen val_inc3 = 350
	gen val_inc4 = 500
	gen val_inc5 = 575
	gen val_inc6 = 725
	gen val_inc7 = 900
	gen val_inc8 = 1125
	gen val_inc9 = 1375
	gen val_inc10 = 1675
	gen val_inc11 = 1875
	gen val_inc12 = 2500
	gen val_inc13 = 3500
	

	egen N_inc = rowtotal(Income_*)

	gen N_median = N_inc/2
	gen N_75perc = 0.75*N_inc
		
	gen median_income = 0
	gen perc75_income = 0
	egen row_sum1 = rowtotal(Income_neg Income_0)
		
	forval x = 1(1)13{
			local y = `x' + 1 
			local z = `x' + 2
			egen row_sum`y' = rowtotal(row_sum`x' Income_`x')
			replace median_income = (val_inc`x') if N_median > row_sum`x' & N_median <= row_sum`y'
			replace perc75_income = (val_inc`x') if N_75perc > row_sum`x' & N_75perc <= row_sum`y'
		}
			

	label variable median_income "Median weekly income (postcode)"
	label variable perc75_income "75th percentile weekly income (postcode)"
	
	keep postcode median_income perc75_income	
			
save OrigData/Census2016postcode/AverageIncome_postcode.dta, replace




 * 3. BEDROOMS
 use  OrigData/Census2006postcode/Bedrooms_postcode.dta, clear
 
 * add up all bedrooms > 5 to be consistent with 2011 and 2016. 
   egen _6_or_more = rowtotal(_6_ - _99_ )
   keep postcode None _1_ _2_ _3_ _4_ _5_ _6_or

   	egen Total = rowtotal(None _1_ _2_ _3_ _4_ _5_ _6)

	
 	gen POne_bedroom = _1_/Total 
	gen PTwo_bedrooms = _2_/Total 
	gen PThree_bedrooms = _3_/Total
	gen PFour_bedrooms = _4_/Total
	gen PFive_bedrooms = _5_/Total
	gen PSix_bedrooms = _6_/Total
	gen PFiveormorebedrooms = PFive_bedrooms+ PSix_bedrooms
 
    gen val_1 = _1 *1
	gen val_2 = _2 *2 
	gen val_3 = _3 *3
	gen val_4 = _4 *4
	gen val_5 = _5 *5
	gen val_6 = _6 *6

	egen N_room = rowtotal(_1_ - _6)

	egen Total_val_room = rowtotal(val_*)
	gen av_rooms = Total_val_room/N_room

	* find the median 
	gen N_median = N_room/2
	gen N_75perc = 0.75*N_room
	
	gen median_rooms = 0
	gen perc75_rooms = 0
	
	egen row_sum1 = rowtotal(None _1_)
	
	
	forval x = 1(1)5{
		local y = `x' + 1 
		egen row_sum`y' = rowtotal(row_sum`x' _`y')
		replace median_rooms = (val_`y')/_`y' if N_median > row_sum`x' & N_median <= row_sum`y'
		replace perc75_rooms = (val_`y')/_`y' if N_75perc > row_sum`x' & N_75perc <= row_sum`y'
		}
		
	label variable av_rooms "Mean bedrooms"
	label variable median_rooms "Median bedrooms"
	label variable perc75_rooms "75th percentile bedrooms"
 
 
	keep P* postcode median_rooms av_rooms perc75_rooms
 
 save OrigData/Census2006postcode/PBedrooms_postcode.dta, replace 
 
 use  OrigData/Census2016postcode/Bedrooms_postcode.dta, clear

	capture drop Total
	egen Total = rowtotal(  None One_bedroom Two_bedrooms Three_bedrooms Four_bedrooms Five_bedrooms Six_bedrooms)

	gen POne_bedroom = One_bedroom/Total 
	gen PTwo_bedrooms = Two_bedrooms/Total 
	gen PThree_bedrooms = Three_bedrooms/Total
	gen PFour_bedrooms = Four_bedrooms/Total
	gen PFive_bedrooms = Five_bedrooms/Total
	gen PSix_bedrooms = Six_bedrooms/Total
	gen PFiveormorebedrooms = PFive_bedrooms+ PSix_bedrooms
 
	rename None room_0
	rename One room_1
	rename Two room_2
	rename Three room_3
	rename Four room_4
	rename Five room_5
	rename Six room_6
	
	gen val_room1 = room_1 *1
	gen val_room2 = room_2 *2 
	gen val_room3 = room_3 *3
	gen val_room4 = room_4 *4
	gen val_room5 = room_5 *5
	gen val_room6 = room_6 *6

	egen N_room = rowtotal(room_0-room_6)

	egen Total_val_room = rowtotal(val_*)
	gen av_rooms = Total_val_room/N_room

	* find the median 
	gen N_median = N_room/2
	gen N_75perc = 0.75*N_room
	
	gen median_rooms = 0
	gen perc75_rooms = 0
	
	egen row_sum1 = rowtotal(room_0 room_1)
	
	
	forval x = 1(1)5{
		local y = `x' + 1 
		egen row_sum`y' = rowtotal(row_sum`x' room_`y')
		replace median_rooms = (val_room`y')/room_`y' if N_median > row_sum`x' & N_median <= row_sum`y'
		replace perc75_rooms = (val_room`y')/room_`y' if N_75perc > row_sum`x' & N_75perc <= row_sum`y'
		}
		
	label variable av_rooms "Mean bedrooms"
	label variable median_rooms "Median bedrooms"
	label variable perc75_rooms "75th percentile bedrooms"
 
 
	keep P* postcode median_rooms av_rooms perc75_rooms
 
 save OrigData/Census2016postcode/PBedrooms_postcode.dta, replace

 
 
 
 
 
 
 * 4. MORTGAGE
 
use OrigData/Census2006postcode/Mortgage_postcode.dta, clear
 
	gen N_mortgage = Total-Not_applicable - Not_stated

	rename _1__ pay1
	rename _100__ pay2
	rename _150_ pay3
	rename _250_ pay4
	rename _400_ pay5
	rename _550_ pay6
	rename _650_ pay7
	rename _750_ pay8
	rename _850_ pay9
	rename _950_ pay10
	rename _1_050_ pay11
	rename _1_200_ pay12
	rename _1_400_ pay13
	rename _1_600_ pay14
	rename _2_000_ pay15
	rename _2_400_ pay16
	rename _3_000_ pay17
	rename _4_000_ pay18
	 
	gen repayment1 =50
	gen repayment2 =125
	gen repayment3 =200
	gen repayment4 =325
	gen repayment5 =475
	gen repayment6 =600
	gen repayment7 =700
	gen repayment8 =800
	gen repayment9 =900
	gen repayment10 =1000
	gen repayment11 =1125
	gen repayment12 =1300
	gen repayment13 =1500
	gen repayment14 =1800
	gen repayment15 =2200
	gen repayment16 =2700
	gen repayment17 =3500
	gen repayment18 =4500
	
	
* find the bucket of the median 
	gen N_median = N_mortgage/2
	gen N_75perc = 0.75* N_mortgage
	
	gen median_mortgage = 0
	gen perc75_mortgage = 0
	
	egen row_sum1 = rowtotal(pay1)

	
	forval x = 1(1)17 {
		local y = `x' + 1 
		egen row_sum`y' = rowtotal(row_sum`x' pay`y')
		replace median_mortgage = repayment`y' if N_median > row_sum`x' & N_median <= row_sum`y'
		replace perc75_mortgage = repayment`y' if N_75perc > row_sum`x' & N_75perc <= row_sum`y'
		}
 	
	
	
	keep postcode median_ perc75_

save OrigData/Census2006postcode/MedianMortgage_postcode.dta, replace

 
 
 use OrigData/Census2016postcode/Mortgage_postcode.dta, clear
 
	gen N_mortgage = Total-Not_applicable - Not_stated

	rename Nil pay0
	rename _1__ pay1
	rename _150_ pay2
	rename _300_ pay3
	rename _450_ pay4
	rename _600_ pay5
	rename _800_ pay6
	rename _1_000_ pay7
	rename _1_200_ pay8
	rename _1_400_ pay9
	rename _1_600_ pay10
	rename _1_800_ pay11
	rename _2_000_ pay12
	rename _2_200_ pay13
	rename _2_400_ pay14
	rename _2_600_ pay15
	rename _3_000_ pay16
	rename _4_000_ pay17
	rename _5000_ pay18
	 
	gen repayment0 =0
	gen repayment1 =75
	gen repayment2 =225
	gen repayment3 =375
	gen repayment4 =525
	gen repayment5 =700
	gen repayment6 =900
	gen repayment7 =1100
	gen repayment8 =1300
	gen repayment9 =1500
	gen repayment10 =1700
	gen repayment11 =1900
	gen repayment12 =2100
	gen repayment13 =2300
	gen repayment14 =2500
	gen repayment15 =2800
	gen repayment16 =3500
	gen repayment17 =4500
	gen repayment18 =5500
	
	
* find the bucket of the median 
	gen N_median = N_mortgage/2
	gen N_75perc = 0.75* N_mortgage
	
	gen median_mortgage = 0
	gen perc75_mortgage = 0
	
	egen row_sum1 = rowtotal(pay1)

	
	forval x = 1(1)17 {
		local y = `x' + 1 
		egen row_sum`y' = rowtotal(row_sum`x' pay`y')
		replace median_mortgage = repayment`y' if N_median > row_sum`x' & N_median <= row_sum`y'
		replace perc75_mortgage = repayment`y' if N_75perc > row_sum`x' & N_75perc <= row_sum`y'
		}
 	
	
	
	keep postcode median_ perc75_

save OrigData/Census2016postcode/MedianMortgage_postcode.dta, replace


 *5. RENT
 
use OrigData/Census2006postcode/Rent_postcode.dta, clear

	gen N_rent = Total-Not_applicable - Not_stated

	rename _0__ pay1
	rename _50__ pay2
	rename _75_  pay3
	rename _100_ pay4
	rename _120_ pay5
	rename _140_ pay6
	rename _160_ pay7
	rename _180_ pay8
	rename _200_ pay9
	rename _225_ pay10
	rename _250_ pay11
	rename _275_ pay12
	rename _300_ pay13
	rename _350_ pay14
	rename _450_ pay15
	rename _550_ pay16
	
	gen rent1 = 25
	gen rent2 = 62.5
	gen rent3 = 87.5
	gen rent4 = 110
	gen rent5 = 130
	gen rent6 = 150
	gen rent7 = 170
	gen rent8 = 190
	gen rent9 = 212.5
	gen rent10 = 237.5
	gen rent11 = 262.5
	gen rent12 = 287.5
	gen rent13 = 325
	gen rent14 = 400
	gen rent15 = 500
	gen rent16 = 600

	
	gen N_median = N_rent/2
	gen N_75perc = 0.75* N_rent
	
	gen median_rent = 0
	gen perc75_rent = 0
	
	egen row_sum1 = rowtotal(pay1)

	
	forval x = 1(1)15 {
		local y = `x' + 1 
		egen row_sum`y' = rowtotal(row_sum`x' pay`y')
		replace median_rent = rent`y' if N_median > row_sum`x' & N_median <= row_sum`y'
		replace perc75_rent = rent`y' if N_75perc > row_sum`x' & N_75perc <= row_sum`y'
		}
 	
	keep postcode median_ perc75_

save OrigData/Census2006postcode/MedianRent_postcode.dta, replace

 
use OrigData/Census2016postcode/Rent_postcode.dta, clear

	gen N_rent = Total-Not_applicable - Not_stated

	rename Nil pay0
	rename _1__ pay1
	rename _75__ pay2
	rename _100_  pay3
	rename _125_ pay4
	rename _150_ pay5
	rename _175_ pay6
	rename _200_ pay7
	rename _225_ pay8
	rename _250_ pay9
	rename _275_ pay10
	rename _300_ pay11
	rename _325_ pay12
	rename _350_ pay13
	rename _375_ pay14
	rename _400_ pay15
	rename _425_ pay16
	rename _450_ pay17
	rename _550_ pay18
	rename _650_ pay19
	rename _750_ pay20
	rename _850_ pay21
	rename _950_ pay22
	
	
	
	
	gen rent0=0
	gen rent1 = 35
	gen rent2 = 87.5
	gen rent3 = 112.5
	gen rent4 = 137.5
	gen rent5 = 162.5
	gen rent6 = 187.5
	gen rent7 = 212.5
	gen rent8 = 237.5
	gen rent9 = 262.5
	gen rent10 = 287.5
	gen rent11 = 312.5
	gen rent12 = 337.5
	gen rent13 = 362.5
	gen rent14 = 387.5
	gen rent15 = 412.5
	gen rent16 = 437.5
	gen rent17 = 500
	gen rent18 = 600
	gen rent19 = 700
	gen rent20 = 800
	gen rent21 = 900
	gen rent22 = 1000
	

	
	gen N_median = N_rent/2
	gen N_75perc = 0.75* N_rent
	
	gen median_rent = 0
	gen perc75_rent = 0
	
	egen row_sum1 = rowtotal(pay0 pay1)

	
	forval x = 1(1)15 {
		local y = `x' + 1 
		egen row_sum`y' = rowtotal(row_sum`x' pay`y')
		replace median_rent = rent`y' if N_median > row_sum`x' & N_median <= row_sum`y'
		replace perc75_rent = rent`y' if N_75perc > row_sum`x' & N_75perc <= row_sum`y'
		}
 	
	keep postcode median_ perc75_

save OrigData/Census2016postcode/MedianRent_postcode.dta, replace

 *6. EDUCATION 
 
 
 use OrigData/Census2006postcode/Education_postcode.dta, clear

 
 gen total_applicable = Total  - Level_of_education_not_stated - Level_of_education_inadequately 
 
 gen PBachelor =(Postgraduate_Degree + Graduate_Diploma + Bachelor)/total_applicable
 
 keep postcode PBachelor 
 
 save OrigData/Census2006postcode/PBachelor_postcode.dta, replace
 
 
 
 
 use OrigData/Census2016postcode/Education_postcode.dta, clear

 
 gen total_applicable = Total  - Level_of_education_not_stated - Level_of_education_inadequately  
 
 gen PBachelor =(Postgraduate_Degree + Graduate_Diploma + Bachelor)/total_applicable
 
 keep postcode PBachelor 
 
 save OrigData/Census2016postcode/PBachelor_postcode.dta, replace
 
 
 
 
 
 
 
 
 
 	
	
	*7. DWELLING
	
	clear
import excel using OrigData/Census2006Postcode/Dwellingstructure.xlsx, cellrange(B9:O662) firstrow
	drop if _n==1
	
	rename B postcode
	destring postcode, replace
	
	rename Separatehouse Separate
	rename Semidetachedroworterraceho Semi_detached__one
	rename E Semi_detached__two_or_more
	
	
	gen TotalHouse = Total-Notstated-Notapplicable
	gen TotalHouse_allresponse = Total - Notapplicable
	
	
	gen PSeparateHouse = Separate/(TotalHouse)
	gen PSemiDetached = (Semi_detached__one + Semi_detached__two_or_more)/(TotalHouse)
	
	keep postcode PSeparateHouse PSemiDetached TotalHouse* Separate
save OrigData/Census2006postcode/PSeparate_postcode.dta, replace
	
	clear
import excel using OrigData/Census2006Postcode/Dwellingstructure_Crossing.xlsx, cellrange(B9:O25) firstrow
	drop if _n==1
	capture drop Total
	drop if strpos(B,"crosses")>0 & strpos(B,"VIC") ==0
	drop if strpos(B,"Cells")>0
	drop if strpos(B,"Total")>0

	rename B postcode
	destring postcode, ignore("," "VIC" "/NSW" "crosses") replace
	
	rename Separatehouse Separate
	rename Semidetachedroworterraceho Semi_detached__one
	rename E Semi_detached__two_or_more
	
	egen Total = rowtotal(Separate - Notapplicable) 
	gen TotalHouse = Total-Notstated-Notapplicable
	gen TotalHouse_allresponse = Total - Notapplicable
	gen PSeparateHouse = Separate/(TotalHouse)
	gen PSemiDetached = (Semi_detached__one + Semi_detached__two_or_more)/(TotalHouse)
	
	keep postcode PSeparateHouse PSemiDetached TotalHouse* Separate
 
	append using OrigData/Census2006postcode/PSeparate_postcode.dta
	drop if postcode==.
	save OrigData/Census2006postcode/PSeparate_postcode.dta, replace
	
	
	
	
	
	
	** import dwelling structure from 2016 census

	clear
import excel using OrigData/Census2016Postcode/Dwellingstructure.xlsx, cellrange(B9:O705) firstrow
	drop if _n==1

	rename B postcode
	destring postcode, ignore("," "VIC") replace
	
	rename Separatehouse Separate
	rename Semidetachedroworterraceho Semi_detached__one
	rename E Semi_detached__two_or_more
	
	egen Total = rowtotal(Separate - Notapplicable) 
	gen TotalHouse = Total-Notstated-Notapplicable
	gen TotalHouse_allresponse = Total - Notapplicable
	gen PSeparateHouse = Separate/(TotalHouse)
	gen PSemiDetached = (Semi_detached__one + Semi_detached__two_or_more)/(TotalHouse)
	
	keep postcode PSeparateHouse PSemiDetached TotalHouse* Separate
 
	
save OrigData/Census2016postcode/PSeparate_postcode.dta, replace
	
	clear
import excel using OrigData/Census2016Postcode/Dwellingstructure_Crossing.xlsx, cellrange(B9:O25) firstrow
	drop if _n==1

	drop if strpos(B,"crosses")>0 & strpos(B,"VIC") ==0
	rename B postcode
	destring postcode, ignore("," "VIC" "/NSW" "crosses") replace
	
	rename Separatehouse Separate
	rename Semidetachedroworterraceho Semi_detached__one
	rename E Semi_detached__two_or_more
	
	egen Total = rowtotal(Separate - Notapplicable) 
	gen TotalHouse = Total-Notstated-Notapplicable
	gen TotalHouse_allresponse = Total - Notapplicable
	gen PSeparateHouse = Separate/(TotalHouse)
	gen PSemiDetached = (Semi_detached__one + Semi_detached__two_or_more)/(TotalHouse)
	
	keep postcode PSeparateHouse PSemiDetached TotalHouse* Separate
 
	append using OrigData/Census2016postcode/PSeparate_postcode.dta
	drop if postcode==.
	save OrigData/Census2016postcode/PSeparate_postcode.dta, replace
	

 
 
**************************************************************

global files1  AverageAge PSeparate POwnership PBedrooms PFullTime MedianMortgage MedianRent PBachelor


*. merge census dta files

forval year = 2006(10)2016 {
use OrigData/Census`year'postcode/AverageIncome_postcode.dta, clear
foreach xx of global files1 {
	
	merge 1:1 postcode using OrigData/Census`year'postcode/`xx'_postcode.dta, nogenerate
	
	*erase OrigData/Census`year'postcode/`xx'_postcode.dta
	}


sort postcode
label data Census_postcode
gen year = `year'
save Data/Census`year'postcode.dta, replace

}

use Data/Census2006postcode.dta, clear
append using Data/Census2016postcode.dta


gen contract_quarter = tq(2006q3) if year ==2006
replace contract_quarter = tq(2016q3) if year ==2016


gen contract_start_month = tm(2006m8) if year ==2006
replace contract_start_month = tm(2016m8) if year ==2016



append using Data/Census2011postcode.dta
* census is taken in august - quarter 3. 


format contract_quarter %tq


drop av_income median_HHincome perc75_HHincome av_age PChildren PNonFamily PProf NStudents PTwoVehicles PNoVehicles PThreeMoreVehicles
sort postcode year 
order postcode year







save Data/Census_postcode.dta, replace
